

use "../2_Data/temp/data_covid_province.dta", clear

merge m:1 region date using "../2_Data/temp/data_covid_region.dta", assert(match) nogen

gen week = week(date)
gen dow = dow(date)
*Modify week to be correct. Stata is wrong
replace week = week+1
replace week = week-1 if dow>2 | dow==0

merge m:1 province using "../2_Data/temp/data_covariates.dta", assert(match) nogen

merge m:1 province using "../2_Data/temp/data_std_diff.dta", assert(match using) keep(match) nogen


gen density = population/surface

foreach var of varlist Coastal_surface mount_surface {

replace `var'=`var'/surface

}


gen museums = Museums_state+Museums_other


foreach var of varlist Export museums {
replace `var'=`var'/population
}

gen south=(zone_code>=4)

sort province date

by province: gen daily_increase = totale_casi-totale_casi[_n-1] if province==province[_n-1]

gen totale_casi_origin = totale_casi
gen daily_increase_origin = daily_increase

replace daily_increase = . if daily_increase<0
replace totale_casi = . if daily_increase==.
replace daily_increase=0 if daily_increase==.

*Extrapolate linearly to fix missreporting

sort province date

by province: replace totale_casi = (totale_casi[_n-1]+totale_casi[_n+1])/2 if totale_casi==.
by province: replace totale_casi = totale_casi[_n-1] if totale_casi==.

gen totale_casi_number = totale_casi
gen daily_increase_number = daily_increase

replace totale_casi = (totale_casi/population)*100000
replace daily_increase = (daily_increase/population)*100000

gen post =(date>22149)

encode province, gen(province_code)


gen election = 1 if region == "VENETO" | region == "LIGURIA" | region == "VALLE D'AOSTA" | region == "MARCHE" | region == "CAMPANIA" | region == "TOSCANA" | region == "PUGLIA"
replace election = 0 if election ==. 

gen post_election = election*post

* Provincie di confine

gen boundary1 = 1 if province == "AOSTA" | province =="TORINO" | province=="CUNEO" | province=="BIELLA" | province=="VERCELLI"   

gen boundary2 = 1 if province == "IMPERIA" | province=="SAVONA" | province =="GENOVA" | province=="CUNEO" | province=="ALESSANDRIA"
gen boundary3 = 1 if province == "GENOVA" | province =="LA SPEZIA" | province=="PIACENZA" | province=="PARMA"

gen boundary4 = 1 if province == "BOLZANO" | province=="TRENTO" | province=="VERONA" | province=="VICENZA" | province=="BELLUNO"
gen boundary5 = 1 if province == "PORDENONE" | province=="UDINE" | province=="GORIZIA" | province=="VENEZIA" | province=="TREVISO" | province=="BELLUNO"
gen boundary6 = 1 if province == "MANTOVA" | province=="BRESCIA" | province=="VERONA" | province=="ROVIGO"
gen boundary7 = 1 if province =="FERRARA" | province =="ROVIGO"

gen boundary8 = 1 if province=="MASSA CARRARA" | province=="LUCCA" | province=="PISTOIA" | province=="PRATO" | province=="FIRENZE" | province=="AREZZO" | province=="RIMINI" | province=="FORLI'-CESENA" | province=="RAVENNA" | province=="BOLOGNA" | province=="REGGIO NELL'EMILIA" | province=="MODENA" | province=="PARMA"
gen boundary9 = 1 if province=="AREZZO" | province=="SIENA" | province=="TERNI" | province=="PERUGIA"
gen boundary10 = 1 if province=="GROSSETO" | province=="SIENA" | province=="VITERBO"

gen boundary11 = 1 if province=="RIMINI" | province=="PESARO E URBINO"
gen boundary12 = 1 if province=="PESARO E URBINO" | province=="ANCONA" | province =="MACERATA" | province=="ASCOLI PICENO" | province=="PERUGIA"
gen boundary13 = 1 if province=="ASCOLI PICENO" | province=="RIETI"
gen boundary14 = 1 if province=="ASCOLI PICENO" | province=="TERAMO"

gen boundary15 = 1 if province=="CASERTA" | province=="LATINA" | province=="FROSINONE"
gen boundary16 = 1 if province=="CASERTA" | province=="ISERNIA" | province=="CAMPOBASSO" | province=="BENEVENTO"
gen boundary17 = 1 if province=="AVELLINO" | province=="SALERNO" | province=="POTENZA"

gen boundary18 = 1 if province=="FOGGIA" | province=="CAMPOBASSO"
gen boundary19 = 1 if province=="BARLETTA-ANDRIA-TRANI" | province=="BARI" | province=="TARANTO"| province=="MATERA" | province=="POTENZA"

* Regioni di confine

gen boundary_region1 = 1 if region =="VALLE D'AOSTA" | region=="PIEMONTE"
gen boundary_region2 = 1 if region =="VENETO" | region=="LOMBARDIA" | strpos(region,"ROMAGNA")!=0 | strpos(region, "TRENTO")!=0 | strpos(region,"BOLZANO")!=0 | strpos(region,"FRIULI")!=0
gen boundary_region3 = 1 if region =="LIGURIA" | region=="PIEMONTE" | strpos(region,"ROMAGNA")!=0
gen boundary_region4 = 1 if region =="TOSCANA" | region=="UMBRIA" | region=="LAZIO" | strpos(region,"ROMAGNA")!=0
gen boundary_region5 = 1 if region=="MARCHE" | region=="UMBRIA" | region=="ABRUZZO" | strpos(region,"ROMAGNA")!=0
gen boundary_region6 = 1 if region=="CAMPANIA" | region=="LAZIO" | region=="MOLISE" | region=="BASILICATA"
gen boundary_region7 = 1 if region=="PUGLIA" | region=="MOLISE" | region=="BASILICATA"




forvalues i = 1/19 {
replace boundary`i'=0 if boundary`i'==.
}

forvalues i = 1/7 {
replace boundary_region`i'=0 if boundary_region`i'==.
}
gen boundary_sample = 1 if boundary1==1

forvalues i = 2 / 19 {
replace boundary_sample = 1 if boundary`i'==1
}
replace boundary_sample=0 if boundary_sample==.

replace totale_casi = log(1+totale_casi) // gen log variables
replace daily_increase = log(1+daily_increase) // gen log variables

*Gen dynamic treatment indicators
local j = 34

forvalues i = 1/10 {
local j = `j'-1
gen pla`i' = (week==`j')*election
la var pla`i' "-`i'"
}

*replace pla5=1 if pla6==1 | pla7==1 | pla8==1 | pla9==1 | pla10==1


gen cam = (week==34)*election
la var cam "0"

local j = 34

forvalues i = 1/9 {
local j = `j'+1
gen cam`i' = (week==`j')*election
la var cam`i' "+`i'"
}

*replace cam7 = 1 if cam8==1 | cam9==1

replace pla1 = 1
*replace pla7=1

preserve
collapse (first) population region, by(province)
egen pop_region = sum(population), by(region)
collapse (first) pop_region, by(region)

tempfile pop_region 
save `pop_region', replace

restore

merge m:1 region using `pop_region', assert(match) nogen
gen tamponi_prov = tamponi*population/pop_region
gen tamponi_prov_day = tamponi_day*population/pop_region

gen ratio1 = totale_casi_number/tamponi_prov



replace population = population/100000

levelsof codice_regione, local(prov)

gen trend = (week-34)*(post-1) // weekly pre-campaign trend

foreach province in `prov' {
    
	gen trend_prov`province'= (codice_regione==`province')*trend
}

gen boundary=0
forvalues i = 1/19 {
replace boundary=`i' if boundary`i'==1
}

gen boundary_region=0
forvalues i = 1/7 {
replace boundary_region=`i' if boundary_region`i'==1
}

egen time_fe = group(date boundary)
egen time_fe_region = group(date boundary_region)

*merge 1:1 province date using "../2_Data/temp/google_data.dta", keep(match master) nogen

la var daily_increase "log(New daily positive per 100K inh.)"
la var totale_casi "log(Total Cases per 100K inh.)"
la var ratio1 "Share of positive COVID-19 tests"


la var post_election "Campaign $\times$ Post"
la var election "Campaign"

la var population "Population ($\times$ 100K)"
la var surface "Surface (km$^{2}$)"
la var density "Pop. density (inh./km$^{2}$)"
la var turnout "Voter turnout"

la var turnout_sunday12 "Voter turnout"
la var turnout_sunday19 "Voter turnout"
la var turnout_sunday23 "Voter turnout"
la var turnout_final "Voter turnout"

foreach var of varlist totale_casi daily_increase ratio1 {
	
	egen mean_`var'=wtmean(`var'), by(week election) weight(population)
}

la var mean_daily_increase "log(New daily positive per 100K inh.)"
la var mean_totale_casi "log(Total Cases per 100K inh.)"
la var mean_ratio1 "Share of positive COVID-19 tests"

keep if date<=22203 // Restrict sample to observations until October 15 (regional-level policies come)


merge m:1 province using "../2_Data/temp/data_coordinates.dta", assert(match master) nogen // merge with coordinates data to get the correct Conley (1999) HAC standard errors

* Manual fix for missing regions: 
replace _x = 41.18 if province =="BARLETTA-ANDRIA-TRANI"
replace _y = 16.17 if province=="BARLETTA-ANDRIA-TRANI" // source: http://www.comuni-italiani.it/110/002/clima.html

replace _x = 43.10 if province=="FERMO"
replace _y = 13.43 if province=="FERMO" // source: http://www.comuni-italiani.it/109/006/clima.html

replace _x = 45.35 if province=="MONZA E DELLA BRIANZA"
replace _y = 9.16 if province=="MONZA E DELLA BRIANZA" // source: http://www.comuni-italiani.it/108/033/clima.html

replace _x = 39.10 if province=="SUD SARDEGNA"
replace _y = 8.31 if province=="SUD SARDEGNA"

preserve
keep if date>22094

compress
save "../2_Data/temp/data_selected.dta", replace
restore

sort province date

collapse (first) south post pop_region week post_election election codice_regione ricoverati terapia totale_osp isolamento variazione nuovi dimessi deceduti tamponi casi_testati time_fe time_fe_region ///
(sum) mount_surface surface population totale_casi_number daily_increase_number Coastal_surface museums Export (max) boundary_sample (firstnm) x_region y_region (mean) share_municipal density turnout* ratio1 Hospital_migration Unemployment Employment* RSA Fiber Mobility Tourism Childcare  [aw=population], by(region date) 

      
sort region date

* Manual fix of reporting error: source https://www.regione.abruzzo.it/content/coronavirus-abruzzo-dati-aggiornati-al-27-luglio-casi-positivi-3368

replace tamponi = 124891 if region=="ABRUZZO" & date==d(26jul2020)

* Manual fix of the fact that ABRUZZO did not report on a daily basis on 19, 20, 21 september, but only on a three-day aggregate. We found daily figures of new positives, but not of new tests or deceased. Let's linearly interpolate.
*Source. https://www.rete8.it/approfondimenti/covid-abruzzo-bollettino-21-settembre-72-nuovi-positivi-negli-ultimi-tre-giorni/
replace daily_increase=23 if region=="ABRUZZO" & date==d(19sep2020)
replace daily_increase=36 if region=="ABRUZZO" & date==d(20sep2020)
replace daily_increase=13 if region=="ABRUZZO" & date==d(21sep2020)

replace totale_casi_number = totale_casi_number[_n-1] + daily_increase if region=="ABRUZZO" & date==d(19sep2020) | date==d(20sep2020) | date==d(21sep2020)

by region: gen tamponi_day = tamponi-tamponi[_n-1]

local t = tamponi[208] // manually checked
local t1 = tamponi[211] // manually checked
replace tamponi_day = (`t1'-`t')/3 if region=="ABRUZZO" & (date==d(19sep2020) | date==d(20sep2020) | date==d(21sep2020))


by region: gen deceduti_day = deceduti-deceduti[_n-1]

local t = deceduti[208] // manually checked
local t1 = deceduti[211] // manually checked
replace deceduti_day = (`t1'-`t')/3 if region=="ABRUZZO" & (date==d(19sep2020) | date==d(20sep2020) | date==d(21sep2020))

by region: gen totale_terapia = terapia + terapia[_n+1]

local j = 34

forvalues i = 1/10 {
local j = `j'-1
gen pla`i' = (week==`j')*election
la var pla`i' "-`i'"
}

gen cam = (week==34)*election
la var cam "0"

local j = 34

forvalues i = 1/9 {
local j = `j'+1
gen cam`i' = (week==`j')*election
la var cam`i' "+`i'"
}

replace pla1 = 1
replace pla7 = 1

gen trend = (week-34)*(post-1) // weekly pre-campaign trend

gen totale_casi = totale_casi_number

cap drop daily_increase_number

gen daily_increase = nuovi_positivi


* Manual fix: missing deceased data on 1/07/2020. Source: https://www.repubblica.it/cronaca/2020/07/01/news/coronavirus_il_bollettino_di_oggi_142_nuovi_casi_21_morti-260700083/c
* Source: https://www.quotidiano.net/cronaca/coronavirus-italia-bollettino-1-luglio-1.5279356

replace deceduti_day = 6 if region=="LOMBARDIA" & date==22097
replace deceduti_day = 4 if region=="VENETO" & date==22097
replace deceduti_day = 2 if region=="LAZIO" & date==22097
replace deceduti_day = 4 if region=="EMILIA-ROMAGNA" & date==22097
replace deceduti_day = 2 if region=="TOSCANA" & date==22097
replace deceduti_day = 1 if region=="PIEMONTE" & date==22097

replace deceduti_day = 0 if date==22097 & region!="LOMBARDIA" & region!="VENETO" & region!="LAZIO" & region!="EMILIA-ROMAGNA" & region!="TOSCANA" & region!="PIEMONTE"

replace deceduti_day = 0 if region=="SARDEGNA" & date==22124 // source: https://www.regione.sardegna.it/j/v/2568?s=411982&v=2&c=94255&t=1

* Manual fix: clear reporting error (raw data say 155 deceased in one region in one date. Unbelievable).
replace deceduti_day = 1 if region=="EMILIA-ROMAGNA" & date==d(15aug2020) // source https://www.ilrestodelcarlino.it/cronaca/bollettino-covid-oggi-15-agosto-emilia-romagna-1.5418753

* Manual fix: Basilicata has aggregated figures from two different dates for a while. 

replace tamponi_day =  122 if region=="BASILICATA" & date==d(05jul2020) // https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?sec=100133&otype=1012&id=3067278&value=regione
replace tamponi_day =  123 if region=="BASILICATA" & date==d(06jul2020) // https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?sec=100133&otype=1012&id=3067278&value=regione

replace tamponi_day = 154 if region=="BASILICATA" & date==d(12jul2020) // https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?otype=1012&id=3067449
replace tamponi_day = 155 if region=="BASILICATA" & date==d(13jul2020) // https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?otype=1012&id=3067449

replace tamponi_day = 125 if region=="BASILICATA" & date==d(19jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?otype=1012&id=3067651
replace tamponi_day = 126 if region=="BASILICATA" & date==d(20jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?otype=1012&id=3067651

replace daily_increase = 1 if region=="BASILICATA" & date==d(19jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?otype=1012&id=3067651
replace daily_increase = 2 if region=="BASILICATA" & date==d(20jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?otype=1012&id=3067651

replace tamponi_day = 128 if region=="BASILICATA" & date==d(26jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?sec=100133&otype=1012&id=3067854&value=regione
replace tamponi_day = 129 if region=="BASILICATA" & date==d(27jul2020) // https://www.regione.basilicata.it/giunta/site/giunta/detail.jsp?sec=100133&otype=1012&id=3067854&value=regione

replace tamponi_day = 136 if region=="BASILICATA" & date==d(09aug2020) //  https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?otype=1012&id=3068221
replace tamponi_day = 137 if region=="BASILICATA" & date==d(10aug2020) // https://www.regione.basilicata.it/giunta/site/Giunta/detail.jsp?otype=1012&id=3068221

 
replace totale_osp = ricoverati_con_sintomi

replace x_region=. if region=="P.A. BOLZANO"
replace y_region=. if region=="P.A. BOLZANO"

replace region="TRENTINO-ALTO ADIGE" if strpos(region,"P.A.")!=0
replace codice_regione = 4 if codice_regione>=21


collapse (sum) ricoverati terapia totale_osp isolamento deceduti tamponi tamponi_day deceduti_day totale_terapia totale_casi daily_increase pop_region mount_surface Coastal_surface surface (firstnm)  south cam* pla* week trend time_fe_region post_election election region x_region y_region boundary_sample  ///
(mean) share_municipal density museums Export turnout* Hospital_migration Unemployment Employment* RSA Fiber Mobility Tourism Childcare [aw=population], by(codice_regione date)

gen ratio2 = daily_increase/tamponi_day*100

egen region_code = group(region)

foreach var of varlist ricoverati terapia totale_osp isolamento deceduti tamponi tamponi_day deceduti_day totale_terapia totale_casi daily_increase {
	gen number_`var'=`var'/pop_region*100000
	replace `var' = log(1+(`var'/pop_region)*100000)
}

gen number_ratio2 = ratio2
replace ratio2 = log(1+ratio2)

la var ratio2 "log(Daily share of positive tests)"

la var totale_casi "\scriptsize{log(Total Cases per 100K inh.)}"
la var post_election "Post $\times$ Campaign"
la var deceduti "\scriptsize{log(Total deceased per 100K inh.)}"
la var terapia "\scriptsize{log(Patients in ICU per 100K inh.)}"
la var totale_osp "\scriptsize{log(Total hospitalized per 100K inh.)}"
la var tamponi "\scriptsize{log(Total number of tests per 100K inh.)}"

keep if date>22094

sort region date

foreach var of varlist tamponi_day daily_increase ratio2 totale_osp terapia deceduti_day {

replace `var'= (`var'+`var'[_n-1]+`var'[_n-2]+`var'[_n-3]+`var'[_n-4]+`var'[_n-5]+`var'[_n-6])/ 7 if region==region[_n-6] // moving average
replace `var'= . if region!=region[_n-6]
replace number_`var'=(number_`var'+number_`var'[_n-1]+number_`var'[_n-2]+number_`var'[_n-3]+number_`var'[_n-4]+number_`var'[_n-5]+number_`var'[_n-6])/ 7 if region==region[_n-6]
replace number_`var'= . if region!=region[_n-6]
}

compress
save "../2_Data/temp/data_selected_region.dta", replace
